import scrapy
from scrapy import cmdline


class SouhuSpider(scrapy.Spider):
    name = "souhu"
    allowed_domains = ["www.linkshop.com"]
    start_urls = ["http://www.linkshop.com/news/"]

    def parse(self, response):
        list = response.xpath('/html/body/div[3]/article/section[1]/article[2]/div[1]/div/section[1]')
        for item in list:
            for node in item.xpath('.//div'):
                detail_url = node.xpath(".//div/h2/a/text()").get()
                title = node.xpath(".//div/h2/a/@href").get()
                if detail_url is None:
                    continue
                if title is None:
                    continue
                print('url=' + detail_url + ',title=' + title)



if __name__ == '__main__':
    cmdline.execute("scrapy crawl souhu".split())
